In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Clicca qui per nascondere/ mostrare il codice"></form>''')
Out[1]:
In [2]:
import datetime

print(datetime.datetime.today())
2020-10-21 15:17:01.636575
In [3]:
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px


import warnings
warnings.filterwarnings('ignore')
In [4]:
url_r = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv"
data_region = pd.read_csv(url_r)
#print(data_region.dtypes)
#print(data_region.isnull().sum())
#print(data_region.shape)
#print(data_region.head())

url_p = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv"
data_province = pd.read_csv(url_p)
#print(data_province.dtypes)
#print(data_province.isnull().sum())
#print(data_province.shape)
#print(data_province.head())

url_n = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"
data_national = pd.read_csv(url_n)
#print(data_national.dtypes)
#print(data_national.isnull().sum())
#print(data_national.shape)
#print(data_national.head())
In [5]:
#add daily cases/deaths/recovered columns
data_national['new_cases'] = data_national['totale_casi'].diff()
data_national['new_deaths'] = data_national['deceduti'].diff()
data_national['new_recovered'] = data_national['dimessi_guariti'].diff()
data_national['new_swabs'] = data_national['tamponi'].diff()
data_national['new_unique_tested'] = data_national['casi_testati'].diff()
#add a day/day-1 percentage change for new_cases
data_national['daily_cases_perc_change'] = round((data_national['new_cases'].pct_change(1))*100,2)
data_national['daily_swab_perc_change'] = round((data_national['new_swabs'].pct_change(1))*100,2)
data_national['daily_unique_tested_perc_change'] = round((data_national['new_unique_tested'].pct_change(1))*100,2)
#detect ratio
data_national['detect_ratio_swabs'] = round((data_national['new_cases'] / data_national['new_swabs'])*100,2)
data_national['detect_ratio_cases'] = round((data_national['new_cases'] / data_national['new_unique_tested'])*100,2)
data_national.tail(10)
Out[5]:
data stato ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi dimessi_guariti ... new_cases new_deaths new_recovered new_swabs new_unique_tested daily_cases_perc_change daily_swab_perc_change daily_unique_tested_perc_change detect_ratio_swabs detect_ratio_cases
230 2020-10-11T17:00:00 ITA 4519 420 4939 74136 79075 4246 5456 239709 ... 5456.0 26.0 1184.0 104658.0 68708.0 -4.68 -21.36 -14.23 5.21 7.94
231 2020-10-12T17:00:00 ITA 4821 452 5273 77491 82764 3689 4619 240600 ... 4619.0 39.0 891.0 85442.0 59649.0 -15.34 -18.36 -13.18 5.41 7.74
232 2020-10-13T17:00:00 ITA 5076 514 5590 81603 87193 4429 5901 242028 ... 5898.0 41.0 1428.0 112544.0 70260.0 27.69 31.72 17.79 5.24 8.39
233 2020-10-14T17:00:00 ITA 5470 539 6009 86436 92445 5252 7332 244065 ... 7332.0 43.0 2037.0 152196.0 87601.0 24.31 35.23 24.68 4.82 8.37
234 2020-10-15T17:00:00 ITA 5796 586 6382 92884 99266 6821 8804 245964 ... 8803.0 83.0 1899.0 162932.0 98542.0 20.06 7.05 12.49 5.40 8.93
235 2020-10-16T17:00:00 ITA 6178 638 6816 100496 107312 8046 10010 247872 ... 10009.0 55.0 1908.0 150377.0 92402.0 13.70 -7.71 -6.23 6.66 10.83
236 2020-10-17T17:00:00 ITA 6617 705 7322 109613 116935 9623 10925 249127 ... 10925.0 47.0 1255.0 165837.0 103326.0 9.15 10.28 11.82 6.59 10.57
237 2020-10-18T17:00:00 ITA 7131 750 7881 118356 126237 9302 11705 251461 ... 11705.0 69.0 2334.0 146541.0 95554.0 7.14 -11.64 -7.52 7.99 12.25
238 2020-10-19T17:00:00 ITA 7676 797 8473 125530 134003 7766 9338 252959 ... 9337.0 73.0 1498.0 98862.0 65824.0 -20.23 -32.54 -31.11 9.44 14.18
239 2020-10-20T17:00:00 ITA 8454 870 9324 133415 142739 8736 10874 255005 ... 10871.0 89.0 2046.0 144737.0 87680.0 16.43 46.40 33.20 7.51 12.40

10 rows × 27 columns

In [6]:
#regional data preparation

data_region_Abruzzo = data_region[(data_region['denominazione_regione'] == 'Abruzzo')]
data_region_Basilicata = data_region[(data_region['denominazione_regione'] == 'Basilicata')]
data_region_Bolzano = data_region[(data_region['denominazione_regione'] == 'P.A. Bolzano')]
data_region_Calabria = data_region[(data_region['denominazione_regione'] == 'Calabria')]
data_region_Campania = data_region[(data_region['denominazione_regione'] == 'Campania')]
data_region_EmiliaR = data_region[(data_region['denominazione_regione'] == 'Emilia-Romagna')]
data_region_Friuli = data_region[(data_region['denominazione_regione'] == 'Friuli Venezia Giulia')]
data_region_Lazio = data_region[(data_region['denominazione_regione'] == 'Lazio')]
data_region_Liguria = data_region[(data_region['denominazione_regione'] == 'Liguria')]
data_region_Lombardia = data_region[(data_region['denominazione_regione'] == 'Lombardia')]
data_region_Marche = data_region[(data_region['denominazione_regione'] == 'Marche')]
data_region_Molise = data_region[(data_region['denominazione_regione'] == 'Molise')]
data_region_Piemonte = data_region[(data_region['denominazione_regione'] == 'Piemonte')]
data_region_Puglia = data_region[(data_region['denominazione_regione'] == 'Puglia')]
data_region_Sardegna = data_region[(data_region['denominazione_regione'] == 'Sardegna')]
data_region_Sicilia = data_region[(data_region['denominazione_regione'] == 'Sicilia')]
data_region_Toscana = data_region[(data_region['denominazione_regione'] == 'Toscana')]
data_region_Trento = data_region[(data_region['denominazione_regione'] == 'P.A. Trento')]
data_region_Umbria = data_region[(data_region['denominazione_regione'] == 'Umbria')]
data_region_VAosta = data_region[(data_region['denominazione_regione'] == "Valle d'Aosta")]
data_region_Veneto = data_region[(data_region['denominazione_regione'] == 'Veneto')]

def region_apply(region):
    for x in region:
        x['new_cases'] =  x['totale_casi'].diff()
        x['new_deaths'] = x['deceduti'].diff()
        x['new_recovered'] = x['dimessi_guariti'].diff()
        x['new_swabs'] = x['tamponi'].diff()
        #add a day/day-1 percentage change for new_cases
        x['daily_cases_perc_change'] = round((x['new_cases'].pct_change(1))*100,2)
        x['daily_swab_perc_change'] = round((x['new_swabs'].pct_change(1))*100,2)
        #detect ratio
        x['detect_ratio'] = round((x['new_cases'] / x['new_swabs'])*100,2)
        return; 

region_apply([data_region_Abruzzo])  
region_apply([data_region_Basilicata]) 
region_apply([data_region_Bolzano])
region_apply([data_region_Calabria])
region_apply([data_region_Campania])
region_apply([data_region_EmiliaR])
region_apply([data_region_Friuli])
region_apply([data_region_Lazio])
region_apply([data_region_Liguria])
region_apply([data_region_Lombardia])
region_apply([data_region_Marche])
region_apply([data_region_Molise])
region_apply([data_region_Piemonte])
region_apply([data_region_Puglia])
region_apply([data_region_Sardegna])
region_apply([data_region_Sicilia])
region_apply([data_region_Toscana])
region_apply([data_region_Trento])
region_apply([data_region_VAosta])
region_apply([data_region_Veneto])
In [7]:
data_region_Nordovest = data_region[(data_region.denominazione_regione.isin(['Piemonte', 'Lombardia', 'Liguria',"Valle d'Aosta"]))]
data_region_Nordest = data_region[(data_region.denominazione_regione.isin(['Emilia-Romagna', 'P.A. Bolzano', 'P.A. Trento', 'Veneto', 'Friuli Venezia Giulia']))]
data_region_Centro = data_region[(data_region.denominazione_regione.isin(['Toscana', 'Umbria', 'Marche', 'Lazio']))]
data_region_Sudisole = data_region[(data_region.denominazione_regione.isin(['Abruzzo', 'Molise', 'Campania', 'Puglia', 'Basilicata', 'Calabria', 'Sicilia', 'Sardegna']))]

cases_Nordovest = data_region_Nordovest.groupby('data').sum()
region_apply([cases_Nordovest])  
cases_Nordovest['data'] = cases_Nordovest.index

cases_Nordest = data_region_Nordest.groupby('data').sum()
region_apply([cases_Nordest])  
cases_Nordest['data'] = cases_Nordest.index

cases_Centro = data_region_Centro.groupby('data').sum()
region_apply([cases_Centro])  
cases_Centro['data'] = cases_Centro.index

cases_Sudisole = data_region_Sudisole.groupby('data').sum()
region_apply([cases_Sudisole])  
cases_Sudisole['data'] = cases_Sudisole.index

cases_Nordovest.tail(5)
Out[7]:
codice_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi ... tamponi casi_testati new_cases new_deaths new_recovered new_swabs daily_cases_perc_change daily_swab_perc_change detect_ratio data
data
2020-10-16T17:00:00 13 180.689065 33.123883 1852 150 2002 28807 30809 2978 3852 ... 3681229 2238296.0 3852.0 17.0 857.0 45451.0 7.03 -12.96 8.48 2020-10-16T17:00:00
2020-10-17T17:00:00 13 180.689065 33.123883 2025 187 2212 32309 34521 3712 4178 ... 3726942 2263052.0 4178.0 16.0 450.0 45713.0 8.46 0.58 9.14 2020-10-17T17:00:00
2020-10-18T17:00:00 13 180.689065 33.123883 2267 206 2473 34941 37414 2893 4541 ... 3771332 2291990.0 4541.0 26.0 1622.0 44390.0 8.69 -2.89 10.23 2020-10-18T17:00:00
2020-10-19T17:00:00 13 180.689065 33.123883 2481 212 2693 37117 39810 2396 3078 ... 3797797 2309178.0 3078.0 16.0 666.0 26465.0 -32.22 -40.38 11.63 2020-10-19T17:00:00
2020-10-20T17:00:00 13 180.689065 33.123883 2846 222 3068 40327 43395 3585 4401 ... 3839186 2334090.0 4401.0 31.0 785.0 41389.0 42.98 56.39 10.63 2020-10-20T17:00:00

5 rows × 25 columns

In [8]:
fig2 = px.bar(data_national, x='data', y='totale_casi',
             hover_data=['totale_casi'], color='totale_casi',
             height=600, color_continuous_scale='Sunsetdark')

fig2.update_layout(title_text='Total COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig2.update_yaxes(tick0=0, dtick=25000,  gridcolor='White')
fig2.show()
In [9]:
fig22 = px.bar(data_national, x='data', y='totale_positivi',
             hover_data=['totale_positivi'], color='totale_positivi',
             height=600, color_continuous_scale='Sunsetdark')

fig22.update_layout(title_text='Active COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig22.update_yaxes(tick0=0, dtick=10000,  gridcolor='White')
fig22.show()
In [10]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_cases'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_cases'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_cases'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_cases'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily Coronavirus new cases - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [11]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_swabs'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_swabs'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_swabs'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_swabs'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily swabs - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [12]:
fig = go.Figure()


fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily Deaths",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily Recovered",
                         line_color='green'))



fig.update_layout(title_text='Daily Coronavirus Deaths and Recoveries - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=500)

fig.show()
In [13]:
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily Cases",
                         line_color='deepskyblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="Daily swabs",
                         line_color='purple'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_unique_tested'], name="Daily unique tested",
                         line_color='red'))
fig.update_layout(title_text='Daily Coronavirus new cases and swabs - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=10000)

fig.show()
In [14]:
fig3 = go.Figure()

fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_swabs'], name="Daily detect ratio - Italy",
                         line_color='purple'))
fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_cases'], name="Daily unique detect ratio - Italy",
                         line_color='red'))


fig3.update_layout(title_text="Daily Swabs detect ratio - Italy",
                  xaxis_rangeslider_visible=True)
fig3.update_yaxes(dtick=5)
In [15]:
fig4 = go.Figure()

#fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
#                         line_color='blue'))
fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['totale_ospedalizzati'], name="Daily total Hospital - Italy",
                         line_color='green'))

fig4.update_layout(title_text="Daily Total Hospital - Italy",
                  xaxis_rangeslider_visible=True)
fig4.update_yaxes(dtick=2000)
In [16]:
fig5 = go.Figure()

fig5.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='blue'))

fig5.update_layout(title_text="Daily Total UTI - Italy",
                  xaxis_rangeslider_visible=True)
fig5.update_yaxes(dtick=200)
In [17]:
fig6 = go.Figure()

fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_cases_perc_change'], name="Daily cases percentual change - Italy",
                         line_color='purple'))
fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_swab_perc_change'], name="Daily swab percentual change - Italy",
                         line_color='red'))


fig6.update_layout(title_text="Daily v- Italy",
                  xaxis_rangeslider_visible=True)
fig6.update_yaxes(dtick=40)
In [18]:
fig7 = go.Figure()

fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily new recovered - Italy",
                         line_color='purple'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily new cases - Italy",
                         line_color='green'))

fig7.update_layout(title_text="Daily change - Italy",
                  xaxis_rangeslider_visible=True)
fig7.update_yaxes(dtick=500)
In [19]:
fig8 = go.Figure()

fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='purple'))
fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))


fig8.update_layout(title_text="Daily UTI vs  Death - Italy",
                  xaxis_rangeslider_visible=True)
fig8.update_yaxes(dtick=200)
In [20]:
data_ge=data_province[data_province['sigla_provincia']=='GE']
#print(data_ge)
fig9 = go.Figure()

fig9.add_trace(go.Scatter(mode = "lines+markers", x=data_ge['data'], y=data_ge['totale_casi'], name="Daily cases GE - Italy",
                         line_color='red'))



fig9.update_layout(title_text="Daily cases GE - Italy",
                  xaxis_rangeslider_visible=True)
fig9.update_yaxes(dtick=1000)
In [21]:
fig10 = go.Figure()

fig10.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['totale_casi'], name="Daily cases Liguria - Italy",
                         line_color='red'))



fig10.update_layout(title_text="Daily cases Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig10.update_yaxes(dtick=1000)
In [22]:
fig11 = go.Figure()

fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['terapia_intensiva'], name="Daily UTI Liguria - Italy",
                         line_color='red'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['ricoverati_con_sintomi'], name="Daily hospital Liguria - Italy",
                         line_color='purple'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_deaths'], name="Daily new deaths Liguria - Italy",
                         line_color='green'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
                         line_color='yellow'))

fig11.update_layout(title_text="Daily change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig11.update_yaxes(dtick=100)
In [23]:
fig12 = go.Figure()

#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily UTI Liguria - Italy",
#                         line_color='red'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily hospital Liguria - Italy",
#                         line_color='purple'))
fig12.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['detect_ratio'], name="Daily detect  ratio Liguria - Italy",
                         line_color='green'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
#                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
#                         line_color='yellow'))

fig12.update_layout(title_text="Daily detect ratio Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig12.update_yaxes(dtick=20)
In [24]:
fig13 = go.Figure()

fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily cases perc change Liguria - Italy",
                         line_color='red'))
fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily swab perc change Liguria - Italy",
                         line_color='purple'))


fig13.update_layout(title_text="Daily percentual change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig13.update_yaxes(dtick=100)
In [25]:
print(data_national.dtypes)
data                                object
stato                               object
ricoverati_con_sintomi               int64
terapia_intensiva                    int64
totale_ospedalizzati                 int64
isolamento_domiciliare               int64
totale_positivi                      int64
variazione_totale_positivi           int64
nuovi_positivi                       int64
dimessi_guariti                      int64
deceduti                             int64
casi_da_sospetto_diagnostico       float64
casi_da_screening                  float64
totale_casi                          int64
tamponi                              int64
casi_testati                       float64
note                                object
new_cases                          float64
new_deaths                         float64
new_recovered                      float64
new_swabs                          float64
new_unique_tested                  float64
daily_cases_perc_change            float64
daily_swab_perc_change             float64
daily_unique_tested_perc_change    float64
detect_ratio_swabs                 float64
detect_ratio_cases                 float64
dtype: object
In [ ]:
 
In [ ]:
 
In [26]:
import pandas as pd
import numpy as np
import datetime
import requests
import warnings

import matplotlib.pyplot as plt
import matplotlib
import matplotlib.dates as mdates
import seaborn as sns
import plotly.offline as py


from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OrdinalEncoder
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima_model import ARIMA
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot

from IPython.display import Image
warnings.filterwarnings('ignore')
%matplotlib inline
In [27]:
datas = pd.DataFrame(columns = ['ds','y'])
datas['ds'] = data_national['data']
datas['y'] = data_national['new_cases']

prop=Prophet()
prop.fit(datas)
future=prop.make_future_dataframe(periods=3)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)

fig = plot_plotly(prop, prop_forecast)
fig = prop.plot(prop_forecast,xlabel='Date',ylabel='Confirmed Cases')
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
In [28]:
def fit_predict_model(dataframe, interval_width = 0.99, changepoint_range = 0.8):
    m = Prophet(daily_seasonality = False, yearly_seasonality = False, weekly_seasonality = False)
    m = m.fit(dataframe)
    
    forecast = m.predict(dataframe)
    forecast['fact'] = dataframe['y'].reset_index(drop = True)
    print('Displaying Prophet plot')
    fig1 = m.plot(forecast)
    m.plot_components(forecast)
    return forecast
    
pred = fit_predict_model(datas)
Displaying Prophet plot
In [29]:
def detect_anomalies(forecast):
    forecasted = forecast[['ds','trend', 'yhat', 'yhat_lower', 'yhat_upper', 'fact']].copy()
    #forecast['fact'] = df['y']

    forecasted['anomaly'] = 0
    forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] = 1
    forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = -1

    #anomaly importances
    forecasted['importance'] = 0
    forecasted.loc[forecasted['anomaly'] ==1, 'importance'] = \
        (forecasted['fact'] - forecasted['yhat_upper'])/forecast['fact']
    forecasted.loc[forecasted['anomaly'] ==-1, 'importance'] = \
        (forecasted['yhat_lower'] - forecasted['fact'])/forecast['fact']
    
    return forecasted

pred = detect_anomalies(pred)
pred.head()
Out[29]:
ds trend yhat yhat_lower yhat_upper fact anomaly importance
0 2020-02-24 18:00:00 2520.216686 2520.216686 789.812974 4274.270302 NaN 0 0.000000
1 2020-02-25 18:00:00 2533.649625 2533.649625 714.327492 4213.446146 93.0 -1 6.680941
2 2020-02-26 18:00:00 2547.082565 2547.082565 728.329081 4348.398827 78.0 -1 8.337552
3 2020-02-27 18:00:00 2560.515505 2560.515505 779.000394 4397.544767 250.0 -1 2.116002
4 2020-02-28 18:00:00 2573.948445 2573.948445 930.186467 4366.614088 238.0 -1 2.908347
In [30]:
future = prop.make_future_dataframe(periods=1)
future.tail()
forecast1 = prop.predict(future)
forecast1[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
Out[30]:
ds yhat yhat_lower yhat_upper
236 2020-10-17 17:00:00 5824.070431 4539.221642 7182.918319
237 2020-10-18 17:00:00 5792.227310 4375.034556 7147.577074
238 2020-10-19 17:00:00 5537.984036 4146.607794 6901.211550
239 2020-10-20 17:00:00 5755.224269 4347.524159 6984.446073
240 2020-10-21 17:00:00 5838.635632 4415.776846 7223.552433
In [ ]: